root.dir <- here::here()
knitr::opts_chunk$set(echo = T, root.dir=root.dir)
knitr::opts_knit$set(root.dir=root.dir)

library(dplyr)
library(ggplot2)

library(echolocatoR)

Import fine-mapping results

Query fine-mapping portal

Download Alzheimer’s Disease GWAS fine-mapping results for the APOE locus via the echolocatoR Fine-mapping Portal API.

local_finemap <- echolocatoR::GITHUB.portal_query(dataset_types = "GWAS", 
                                                 phenotypes = "Alzheimer", 
                                                 LD_panels = c("UKB", "1KGphase3"),
                                                 loci = "APOE", 
                                                 file_types = "multi_finemap", 
                                                 results_dir = "./data", 
                                                 overwrite = F)
## [1] "Fetching echolocatoR Fine-mapping Portal study metadata."
## [1] "+ 4 datasets remain after filtering."
## [1] "+ Searching for multi_finemap files..."
## [1] "+ 2 unique files identified."
## [1] "+ Downloading 2 files..."
## [1] "+ Returning local file paths."

Merge fine-mapping results

finemap_dat <- lapply(local_finemap,function(x){
  dataset <- basename(dirname(dirname(dirname(x))))
  LD_ref <- stringr::str_split(basename(x),pattern = "[.]")[[1]][2]
  printer("Importing",dataset)
  dat <- data.table::fread(x) 
  cbind(dataset=dataset, 
        LD_ref=LD_ref, 
        dat)
}) %>% data.table::rbindlist()
## [1] "Importing Jansen_2018"
## [1] "Importing Marioni_2018"

Colocalization

Import colocalization results from MiGA preprint:

Atlas of genetic effects in human microglia transcriptome across brain regions, aging and disease pathologies

Katia de Paiva Lopes, Gijsje J. L. Snijders, Jack Humphrey, Amanda Allan, Marjolein Sneeboer, Elisa Navarro, Brian M. Schilder, Ricardo A. Vialle, Madison Parks, Roy Missall, Welmoed van Zuiden, Frederieke Gigase, Raphael Kübler, Amber Berdenis van Berlekom, Chotima Böttcher, Josef Priller, René S. Kahn, Lot D. de Witte, Towfique Raj

bioRxiv 2020.10.27.356113; doi: https://doi.org/10.1101/2020.10.27.356113

AD coloc results: all loci

coloc_res <- data.table::fread(here::here("data/MiGA/all_COLOC_results.Microglia_all_regions.snp-level.leadSNP.PPH4_filtered.tsv.gz"))

meta <- echolocatoR::GITHUB.portal_metadata()
## [1] "Fetching echolocatoR Fine-mapping Portal study metadata."
AD_studies <- subset(meta, phenotype=="Alzheimer's Disease")$dataset
coloc_AD <- subset(coloc_res, Study %in% AD_studies)
dim(coloc_AD)
## [1] 42 43
createDT(coloc_AD)

AD coloc results: APOE locus

No genes in microglia eQTL are colocalized with the APOE locus in AD GWAS at PP.H4>0.8.

subset(coloc_AD, Locus=="APOE")
## Empty data.table (0 rows and 43 cols): Study,Dataset,Locus,Gene,snp,V.df1...

Let’s instead ask what the top colocalized gene in the APOE locus is per study.

Using this approach, the top eQTL gene is FKRP.

coloc_res <- data.table::fread(here::here("data/MiGA/all_COLOC_results.Microglia_all_regions.snp-level.leadSNP.tsv.gz"))
top_coloc <- subset(coloc_res, Study %in% AD_studies & Locus=="APOE") %>%
  dplyr::group_by(Study, Locus) %>%
  dplyr::slice_max(SNP.PP.H4, n = 1)
createDT(top_coloc)

CatalogueR

Microglia do seem to be quite important in AD, but the microglia-specific QTL studies are currently available are underpowered. Let’s take a different strategy and query the eQTL Catalogue via catalogueR.

library(catalogueR)

Jansen_2018

gwas.qtl_paths.Jansen_2018 <- catalogueR::eQTL_Catalogue.query(sumstats_paths = local_finemap[1], 
                                 output_dir = "data/catalogueR_queries_Jansen_2018", 
                                 split_files = T,
                                 qtl_search = c("brain","monocyte","macrophage"),
                                 conda_env ="echoR", 
                                 nThread = 10)

Marioni_2018

gwas.qtl_paths.Marioni_2018 <- catalogueR::eQTL_Catalogue.query(sumstats_paths = local_finemap[2], 
                                 output_dir = "data/catalogueR_queries_Marioni_2018", 
                                 split_files = T,
                                 qtl_search = c("brain","monocyte","macrophage"),
                                 conda_env ="echoR", 
                                 force_new_subset = F,
                                 nThread = 10)

Run coloc

Get sample sizes

local_meta <- "data/GWAS-QTL_data_dictionary.xlsx"
if(!file.exists(local_meta)){
  download.file("https://github.com/RajLabMSSM/Fine_Mapping_Shiny/raw/master/www/metadata/GWAS-QTL_data_dictionary.xlsx",destfile = local_meta)
}
meta2 <- xlsx::read.xlsx2(here::here("data/GWAS-QTL_data_dictionary.xlsx"), sheetName = "GWAS")
meta2$N <- as.numeric(meta2$N)

Jansen_2018

# Gather paths
gwas.qtl_paths.Jansen_2018 <- list.files("data/catalogueR_queries_Jansen_2018/",
                                         pattern = ".tsv.gz",
                                         recursive = T, full.names = T) 

coloc_QTLs.Jansen_2018 <- catalogueR::run_coloc(gwas.qtl_paths = gwas.qtl_paths.Jansen_2018, 
                      save_path = "data/catalogueR_queries_Jansen_2018/coloc.Jansen_2018.tsv.gz", 
                      gwas_sample_size = subset(meta2, dataset=="Jansen_2018")$N,
                      nThread = 10)

Marioni_2018

# Gather paths
gwas.qtl_paths.Marioni_2018 <- list.files("data/catalogueR_queries_Marioni_2018/",
                                         pattern = ".tsv.gz",
                                         recursive = T, full.names = T) 

coloc_QTLs.Marioni_2018 <- catalogueR::run_coloc(gwas.qtl_paths = gwas.qtl_paths.Marioni_2018, 
                      save_path = "data/catalogueR_queries_Marioni_2018/coloc.Jansen_2018.tsv.gz", 
                      gwas_sample_size = subset(meta2, dataset=="Marioni_2018")$N,
                      nThread = 10)

Merge results

coloc_QTLs.Jansen_2018 <- data.table::fread(here::here("data/catalogueR_queries_Jansen_2018/coloc.Jansen_2018.tsv.gz"), nThread = 10)
coloc_QTLs.Marioni_2018 <- data.table::fread(here::here("data/catalogueR_queries_Marioni_2018/coloc.Marioni_2018.tsv.gz"), nThread = 10)

coloc_res <- rbind(cbind(dataset="Jansen_2018",
                         coloc_QTLs.Jansen_2018),
                   cbind(dataset="Marioni_2018",
                         coloc_QTLs.Marioni_2018), fill=T)

top_coloc <- coloc_res %>% 
   dplyr::select(dataset, Locus.GWAS, PP.H4, snp, SNP.PP.H4, qtl_id, gene.QTL) %>%
  unique() %>%
  subset(PP.H4>0.5) %>%
  dplyr::group_by(dataset, Locus.GWAS, snp) %>% 
  dplyr::slice_max(order_by = PP.H4, n=3 )
# createDT(top_coloc)

Fine-mapped QTLs

The eQTL Catalogue team recently fine-mapped their entire database (using the tool FINEMAP). We can check for overlap between causal variants here.

Problem : None of the Consensus or Support>0 SNPs seem to be present in the eQTL Catalogue fine-mapping data (filtering choices before fine-mapping?). Thus, this approach isn’t very helpful in this case.

library(GenomicRanges)
ftp_path <- file.path("ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/credible_sets",
                      "BLUEPRINT_SE.monocyte_ge.purity_filtered.txt.gz"
          # "GTEx.brain_cortex_ge.purity_filtered.txt.gz"
          )
ftp_cs <- data.table::fread(ftp_path, nThread = 10)
gr.cs <- subset(ftp_cs, pip>0) %>%
  GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = T, 
                                          seqnames.field = "chr", 
                                          start.field = "pos", end.field = "pos") 
# catalogueR:::rsids_from_coords(gr.cs, genome_build = "hg38")
gr.finemap <- catalogueR::liftover(gwas_data = finemap_dat,
                                     build.conversion = "hg19.to.hg38")
## [1] "XGR:: Lifting genome build: hg19.to.hg38"
## Start at 2021-04-09 00:47:09
## 
## First, import the files formatted as 'GRanges' (2021-04-09 00:47:09) ...
##  import the data file (2021-04-09 00:47:09) ...
## Second, construct GenomicRanges object (2021-04-09 00:47:09) ...
## Third, lift intervals between genome builds 'hg19.to.hg38' (2021-04-09 00:47:09) ...
## Start at 2021-04-09 00:47:09
## 
## 'chain' (from http://galahad.well.ox.ac.uk/bigdata/chain.RData) has been loaded into the working environment (at 2021-04-09 00:47:10)
## 
## End at 2021-04-09 00:47:10
## Runtime in total is: 1 secs
## 
## End at 2021-04-09 00:47:10
## Runtime in total is: 1 secs
suppressWarnings(GenomeInfoDb::seqlevelsStyle(gr.finemap) <- "NCBI")


#### Find any eQTL Catalogue CS within the range ####
gr.range <- subset(gr.cs, 
       (seqnames(gr.cs)==19) & 
       (start(gr.cs)>=min(start(gr.finemap))) &
       (end(gr.cs)<=max(start(gr.finemap))))
gene_dict <- catalogueR::ensembl_to_hgnc(gr.range$phenotype_id)
## [1] "++ Converting: Ensembl IDs ==> HGNC gene symbols"
gr.range$gene <- gene_dict[gr.range$phenotype_id]


#### Find overlap in causal SNPs ####
gr.hits <- echolocatoR::GRanges_overlap(gr.finemap, gr.cs)
## 698 query SNP(s) detected with reference overlap.
gene_dict <- catalogueR::ensembl_to_hgnc(gr.hits$phenotype_id)
## [1] "++ Converting: Ensembl IDs ==> HGNC gene symbols"
gr.hits$gene <- gene_dict[gr.hits$phenotype_id]

Summarise

  • Top candidates:

    • Cell-type: microglia

    • Gene: FOSB

    • SNP: rs11556505

  • rs11556505 has previously been linked to AD and hippocampal degradation.

  • This SNP falls within an exon of TOMM40, which is a gene immediately next to APOE.

  • Colocalization analyses nominated FOSB instead, which is ~500Mb+ downstream of TOMM40.

  • and was shown to be somewhat correlated with HC volume and free-recall memory in humans. However they attributed the effect of rs11556505 to TOMM40.

    The influence of APOE and TOMM40 polymorphisms on hippocampal volume and episodic memory in old age, https://www.frontiersin.org/articles/10.3389/fnhum.2013.00198/full

# -   Merge the nominated cell-types (from epigenomic overlap) with the nominated genes (from colocalization) by SNPs.
# candidates <- merge(snp_overlap_counts,  
#                     top_coloc,  
#                     all.y = T,
#                     by.x = "SNP", by.y = "snp")
createDT(top_coloc) 

Session info

utils::sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4  IRanges_2.24.1      
## [4] S4Vectors_0.28.1     BiocGenerics_0.36.0  catalogueR_0.1.0    
## [7] echolocatoR_0.1.2    ggplot2_3.3.3        dplyr_1.0.5         
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1             Hmisc_4.5-0                
##   [3] BiocFileCache_1.14.0        plyr_1.8.6                 
##   [5] lazyeval_0.2.2              splines_4.0.4              
##   [7] crosstalk_1.1.1             BiocParallel_1.24.1        
##   [9] digest_0.6.27               ensembldb_2.14.0           
##  [11] htmltools_0.5.1.1           fansi_0.4.2                
##  [13] magrittr_2.0.1              checkmate_2.0.0            
##  [15] memoise_2.0.0               xlsx_0.6.5                 
##  [17] BSgenome_1.58.0             cluster_2.1.1              
##  [19] Biostrings_2.58.0           matrixStats_0.58.0         
##  [21] R.utils_2.10.1              ggbio_1.38.0               
##  [23] askpass_1.1                 prettyunits_1.1.1          
##  [25] jpeg_0.1-8.1                colorspace_2.0-0           
##  [27] blob_1.2.1                  rappdirs_0.3.3             
##  [29] xfun_0.22                   crayon_1.4.1               
##  [31] RCurl_1.98-1.3              jsonlite_1.7.2             
##  [33] graph_1.68.0                Exact_2.1                  
##  [35] survival_3.2-7              VariantAnnotation_1.36.0   
##  [37] glue_1.4.2                  gtable_0.3.0               
##  [39] zlibbioc_1.36.0             XVector_0.30.0             
##  [41] DelayedArray_0.16.3         scales_1.1.1               
##  [43] mvtnorm_1.1-1               DBI_1.1.1                  
##  [45] GGally_2.1.1                Rcpp_1.0.6                 
##  [47] progress_1.2.2              htmlTable_2.1.0            
##  [49] foreign_0.8-81              bit_4.0.4                  
##  [51] proxy_0.4-25                OrganismDbi_1.32.0         
##  [53] Formula_1.2-4               DT_0.17                    
##  [55] htmlwidgets_1.5.3           httr_1.4.2                 
##  [57] RColorBrewer_1.1-2          ellipsis_0.3.1             
##  [59] rJava_0.9-13                R.methodsS3_1.8.1          
##  [61] pkgconfig_2.0.3             reshape_0.8.8              
##  [63] XML_3.99-0.6                nnet_7.3-15                
##  [65] sass_0.3.1                  dbplyr_2.1.0               
##  [67] utf8_1.2.1                  here_1.0.1                 
##  [69] tidyselect_1.1.0            rlang_0.4.10               
##  [71] reshape2_1.4.4              AnnotationDbi_1.52.0       
##  [73] munsell_0.5.0               tools_4.0.4                
##  [75] cachem_1.0.4                generics_0.1.0             
##  [77] RSQLite_2.2.4               evaluate_0.14              
##  [79] stringr_1.4.0               fastmap_1.1.0              
##  [81] EnsDb.Hsapiens.v75_2.99.0   yaml_2.2.1                 
##  [83] knitr_1.31                  bit64_4.0.5                
##  [85] purrr_0.3.4                 AnnotationFilter_1.14.0    
##  [87] rootSolve_1.8.2.1           RBGL_1.66.0                
##  [89] R.oo_1.24.0                 xml2_1.3.2                 
##  [91] biomaRt_2.46.3              compiler_4.0.4             
##  [93] rstudioapi_0.13             curl_4.3                   
##  [95] png_0.1-7                   e1071_1.7-6                
##  [97] tibble_3.1.0                bslib_0.2.4                
##  [99] DescTools_0.99.40           stringi_1.5.3              
## [101] GenomicFeatures_1.42.2      lattice_0.20-41            
## [103] ProtGenerics_1.22.0         Matrix_1.3-2               
## [105] vctrs_0.3.6                 pillar_1.5.1               
## [107] lifecycle_1.0.0             BiocManager_1.30.10        
## [109] jquerylib_0.1.3             data.table_1.13.6          
## [111] bitops_1.0-6                lmom_2.8                   
## [113] rtracklayer_1.50.0          R6_2.5.0                   
## [115] latticeExtra_0.6-29         gridExtra_2.3              
## [117] gld_2.6.2                   dichromat_2.0-0            
## [119] boot_1.3-27                 MASS_7.3-53.1              
## [121] assertthat_0.2.1            xlsxjars_0.6.1             
## [123] SummarizedExperiment_1.20.0 openssl_1.4.3              
## [125] rprojroot_2.0.2             withr_2.4.1                
## [127] GenomicAlignments_1.26.0    Rsamtools_2.6.0            
## [129] GenomeInfoDbData_1.2.4      expm_0.999-6               
## [131] hms_1.0.0                   grid_4.0.4                 
## [133] rpart_4.1-15                tidyr_1.1.3                
## [135] class_7.3-18                rmarkdown_2.7              
## [137] MatrixGenerics_1.2.1        biovizBase_1.38.0          
## [139] Biobase_2.50.0              base64enc_0.1-3